Lung ILCs (immune cells with partly residual T/B which need be kept
in this version)
loading 50k cells, demo only get 13,474
(to increase datasize could get some more )
0604: plus data called 20k more in cellranger, total 35,367
cells
(as BAS/EOS/NEU relatively get much lower nGene comparing with
ILCs/Macrophages)
(to get much better resolution of those granulocytes, may should
increase datasize by 1x to 3x)
20240407
rerun initial process with currently modified workflow for the
two-year-ago data
separate LUNG and BALF after preAnno
individual LUNG data plot several figures
source("/Shared_win/projects/RNA_normal/analysis.10x.r")
filt.10x <- Read10X(data.dir = "J:/projects_local/projects/20220506_10x_RZB/output_jiace0518/filtered_feature_bc_matrix/")
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
GEX <- filt.10x$`Gene Expression`
FB <- filt.10x$`Antibody Capture`
dim(GEX)
## [1] 32285 35367
GEX[1:6,1:6]
## 6 x 6 sparse Matrix of class "dgCMatrix"
## AAACCCAAGACATAAC-1 AAACCCAAGAGCATTA-1 AAACCCAAGCATCAAA-1
## Xkr4 . . .
## Gm1992 . . .
## Gm19938 . . .
## Gm37381 . . .
## Rp1 . . .
## Sox17 . . .
## AAACCCAAGCCGGAAT-1 AAACCCAAGCTAAACA-1 AAACCCAAGGTAAGGA-1
## Xkr4 . . .
## Gm1992 . . .
## Gm19938 . . .
## Gm37381 . . .
## Rp1 . . .
## Sox17 . . .
dim(FB)
## [1] 8 35367
FB[,1:6]
## 8 x 6 sparse Matrix of class "dgCMatrix"
## AAACCCAAGACATAAC-1 AAACCCAAGAGCATTA-1 AAACCCAAGCATCAAA-1
## hashtag1_TotalSeqB 8 11 4566
## hashtag2_TotalSeqB 33 11 132
## hashtag3_TotalSeqB 20 110 46
## hashtag4_TotalSeqB 8 4 238
## hashtag5_TotalSeqB 82 6 24
## hashtag6_TotalSeqB 11 4 31
## hashtag7_TotalSeqB 5 139 26
## hashtag8_TotalSeqB 24 14 64
## AAACCCAAGCCGGAAT-1 AAACCCAAGCTAAACA-1 AAACCCAAGGTAAGGA-1
## hashtag1_TotalSeqB 15 29 18
## hashtag2_TotalSeqB 14 452 388
## hashtag3_TotalSeqB 10 22 12
## hashtag4_TotalSeqB 7 12 7
## hashtag5_TotalSeqB 9 8 6
## hashtag6_TotalSeqB 17 1168 10
## hashtag7_TotalSeqB 11 12 5
## hashtag8_TotalSeqB 179 20 13
# change the rownames as sample names
rownames(FB) <- c("LUNG.CTL1","LUNG.CTL2",
"LUNG.CKO1","LUNG.CKO2",
"BALF.CTL1","BALF.CTL2",
"BALF.CKO1","BALF.CKO2")
dim(FB)
## [1] 8 35367
FB[,1:6]
## 8 x 6 sparse Matrix of class "dgCMatrix"
## AAACCCAAGACATAAC-1 AAACCCAAGAGCATTA-1 AAACCCAAGCATCAAA-1
## LUNG.CTL1 8 11 4566
## LUNG.CTL2 33 11 132
## LUNG.CKO1 20 110 46
## LUNG.CKO2 8 4 238
## BALF.CTL1 82 6 24
## BALF.CTL2 11 4 31
## BALF.CKO1 5 139 26
## BALF.CKO2 24 14 64
## AAACCCAAGCCGGAAT-1 AAACCCAAGCTAAACA-1 AAACCCAAGGTAAGGA-1
## LUNG.CTL1 15 29 18
## LUNG.CTL2 14 452 388
## LUNG.CKO1 10 22 12
## LUNG.CKO2 7 12 7
## BALF.CTL1 9 8 6
## BALF.CTL2 17 1168 10
## BALF.CKO1 11 12 5
## BALF.CKO2 179 20 13
rowSums(FB)
## LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 BALF.CKO1 BALF.CKO2
## 5088467 6731641 5112068 2865750 2650375 3758840 3147575 4466945
rowMeans(FB)
## LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 BALF.CKO1 BALF.CKO2
## 143.87613 190.33678 144.54344 81.02893 74.93921 106.28100 88.99751 126.30263
apply(FB,1,max)
## LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 BALF.CKO1 BALF.CKO2
## 18197 12828 10479 5878 8190 12873 6653 15314
apply(FB,1,median)
## LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 BALF.CKO1 BALF.CKO2
## 21 30 23 11 11 16 14 23
rowSums(FB>0)
## LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 BALF.CKO1 BALF.CKO2
## 35357 35358 35355 35280 35273 35355 35339 35357
# shorten rownames
#rownames(FB) <- paste0("hashtag",1:8)
# build seurat object
FB.seur <- CreateSeuratObject(counts = FB,project = "Lung_FB")
FB.seur
## An object of class Seurat
## 8 features across 35367 samples within 1 assay
## Active assay: RNA (8 features, 0 variable features)
rownames(FB.seur)
## [1] "LUNG.CTL1" "LUNG.CTL2" "LUNG.CKO1" "LUNG.CKO2" "BALF.CTL1" "BALF.CTL2"
## [7] "BALF.CKO1" "BALF.CKO2"
perform Seurat ’Demultiplexing with hashtag oligos(HTOs)
ref https://satijalab.org/seurat/articles/hashing_vignette.html
# normalize
FB.seur <- NormalizeData(FB.seur)
FB.seur <- FindVariableFeatures(FB.seur, selection.method = "mean.var.plot")
FB.seur <- ScaleData(FB.seur, features = VariableFeatures(FB.seur))
## Centering and scaling data matrix
# independent assay for HTO data
FB.seur[['HTO']] <- CreateAssayObject(counts = FB)
FB.seur <- NormalizeData(FB.seur, assay = 'HTO', normalization.method = 'CLR')
## Normalizing across features
first define FB colors based on conditions
color.FB <- c(ggsci::pal_d3("category20c")(20)[c(1,6,2,7)],
ggsci::pal_d3("category20b")(20)[c(7,12,13,18)])
#color.FB <- color.FB[c()]
color.FBraw <- c("#FF6C91","lightgrey",color.FB)
color.cnt <- color.FB[c(1,3,5,7)]
scales::show_col(color.FB, ncol = 4)
scales::show_col(color.FBraw, ncol = 6)
scales::show_col(color.cnt, ncol = 2)
par(mfrow=c(2,4))
#plot(sort(rowSums(t(FB))),
# xlab="reordered cells",
# ylab="UMI counts", log="xy",
# main="sum")
plot(sort(t(FB)[,1]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[1])
plot(sort(t(FB)[,2]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[2])
plot(sort(t(FB)[,3]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[3])
plot(sort(t(FB)[,4]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[4])
plot(sort(t(FB)[,5]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[5])
plot(sort(t(FB)[,6]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[6])
plot(sort(t(FB)[,7]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[7])
plot(sort(t(FB)[,8]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[8])
par(mfrow=c(2,4))
#plot(sort(rowSums(t(FB))),
# xlab="reordered cells",
# ylab="UMI counts", log="xy",
# main="sum")
plot(sort(t(FB)[,1],decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[1])
plot(sort(t(FB)[,2],decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[2])
plot(sort(t(FB)[,3],decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[3])
plot(sort(t(FB)[,4],decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[4])
plot(sort(t(FB)[,5],decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[5])
plot(sort(t(FB)[,6],decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[6])
plot(sort(t(FB)[,7],decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[7])
plot(sort(t(FB)[,8],decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[8])
#table.demux
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,4))
plot(table.demux$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux[,grep("CTL|CKO",colnames(table.demux))]), type="p", col="#306000", pch=16, ylab="Singlet")
plot(table.demux$quantile, pch=16, ylab="mod-quantile")
#plot.new()
plot(table.demux[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux)[2+1])
plot(table.demux[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux)[2+2])
plot(table.demux[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux)[2+3])
plot(table.demux[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux)[2+4])
plot(table.demux[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux)[2+5])
plot(table.demux[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux)[2+6])
plot(table.demux[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux)[2+7])
plot(table.demux[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux)[2+8])
#plot(table.demux[,2+9], type="p", col=color.FB[9], pch=16, ylab=colnames(table.demux)[2+9])
#plot(table.demux[,2+10], type="p", col=color.FB[10], pch=16, ylab=colnames(table.demux)[2+10])
options(max.print=5000)
#table.demux.s
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,4))
plot(table.demux.s$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux.s$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux.s[,grep("CTL|CKO",colnames(table.demux.s))]), type="p", col="#306000", pch=16, ylab="Singlet")
plot(table.demux.s$quantile, pch=16, ylab="mod-quantile")
#plot.new()
plot(table.demux.s[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux.s)[2+1])
plot(table.demux.s[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux.s)[2+2])
plot(table.demux.s[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux.s)[2+3])
plot(table.demux.s[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux.s)[2+4])
plot(table.demux.s[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux.s)[2+5])
plot(table.demux.s[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux.s)[2+6])
plot(table.demux.s[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux.s)[2+7])
plot(table.demux.s[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux.s)[2+8])
#plot(table.demux.s[,2+9], type="p", col=color.FB[9], pch=16, ylab=colnames(table.demux.s)[2+9])
#plot(table.demux.s[,2+10], type="p", col=color.FB[10], pch=16, ylab=colnames(table.demux.s)[2+10])
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.90)
## Cutoff for LUNG.CTL1 : 78 reads
## Cutoff for LUNG.CTL2 : 105 reads
## Cutoff for LUNG.CKO1 : 82 reads
## Cutoff for LUNG.CKO2 : 43 reads
## Cutoff for BALF.CTL1 : 37 reads
## Cutoff for BALF.CTL2 : 58 reads
## Cutoff for BALF.CKO1 : 53 reads
## Cutoff for BALF.CKO2 : 82 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 13131 489 21747
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2
## 13131 489 2773 2598 2813 2953 2730 2460
## BALF.CKO1 BALF.CKO2
## 2907 2513
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.91)
## Cutoff for LUNG.CTL1 : 81 reads
## Cutoff for LUNG.CTL2 : 109 reads
## Cutoff for LUNG.CKO1 : 86 reads
## Cutoff for LUNG.CKO2 : 45 reads
## Cutoff for BALF.CTL1 : 38 reads
## Cutoff for BALF.CTL2 : 61 reads
## Cutoff for BALF.CKO1 : 55 reads
## Cutoff for BALF.CKO2 : 86 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 13043 538 21786
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2
## 13043 538 2773 2610 2807 2962 2733 2463
## BALF.CKO1 BALF.CKO2
## 2920 2518
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.92)
## Cutoff for LUNG.CTL1 : 85 reads
## Cutoff for LUNG.CTL2 : 114 reads
## Cutoff for LUNG.CKO1 : 90 reads
## Cutoff for LUNG.CKO2 : 47 reads
## Cutoff for BALF.CTL1 : 40 reads
## Cutoff for BALF.CTL2 : 64 reads
## Cutoff for BALF.CKO1 : 58 reads
## Cutoff for BALF.CKO2 : 90 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 12922 623 21822
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2
## 12922 623 2782 2613 2808 2971 2722 2477
## BALF.CKO1 BALF.CKO2
## 2930 2519
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.93)
## Cutoff for LUNG.CTL1 : 90 reads
## Cutoff for LUNG.CTL2 : 120 reads
## Cutoff for LUNG.CKO1 : 94 reads
## Cutoff for LUNG.CKO2 : 50 reads
## Cutoff for BALF.CTL1 : 42 reads
## Cutoff for BALF.CTL2 : 67 reads
## Cutoff for BALF.CKO1 : 61 reads
## Cutoff for BALF.CKO2 : 94 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 12804 722 21841
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2
## 12804 722 2783 2616 2804 2985 2720 2493
## BALF.CKO1 BALF.CKO2
## 2936 2504
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.94)
## Cutoff for LUNG.CTL1 : 95 reads
## Cutoff for LUNG.CTL2 : 127 reads
## Cutoff for LUNG.CKO1 : 99 reads
## Cutoff for LUNG.CKO2 : 53 reads
## Cutoff for BALF.CTL1 : 44 reads
## Cutoff for BALF.CTL2 : 71 reads
## Cutoff for BALF.CKO1 : 65 reads
## Cutoff for BALF.CKO2 : 100 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 12688 875 21804
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2
## 12688 875 2773 2614 2792 2978 2721 2500
## BALF.CKO1 BALF.CKO2
## 2945 2481
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.95)
## Cutoff for LUNG.CTL1 : 101 reads
## Cutoff for LUNG.CTL2 : 135 reads
## Cutoff for LUNG.CKO1 : 106 reads
## Cutoff for LUNG.CKO2 : 57 reads
## Cutoff for BALF.CTL1 : 47 reads
## Cutoff for BALF.CTL2 : 75 reads
## Cutoff for BALF.CKO1 : 69 reads
## Cutoff for BALF.CKO2 : 106 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 12525 1044 21798
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2
## 12525 1044 2769 2605 2794 2984 2714 2513
## BALF.CKO1 BALF.CKO2
## 2953 2466
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.96)
## Cutoff for LUNG.CTL1 : 108 reads
## Cutoff for LUNG.CTL2 : 145 reads
## Cutoff for LUNG.CKO1 : 113 reads
## Cutoff for LUNG.CKO2 : 61 reads
## Cutoff for BALF.CTL1 : 50 reads
## Cutoff for BALF.CTL2 : 81 reads
## Cutoff for BALF.CKO1 : 74 reads
## Cutoff for BALF.CKO2 : 113 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 12320 1262 21785
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2
## 12320 1262 2743 2604 2788 2991 2702 2533
## BALF.CKO1 BALF.CKO2
## 2971 2453
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.97)
## Cutoff for LUNG.CTL1 : 117 reads
## Cutoff for LUNG.CTL2 : 157 reads
## Cutoff for LUNG.CKO1 : 123 reads
## Cutoff for LUNG.CKO2 : 67 reads
## Cutoff for BALF.CTL1 : 55 reads
## Cutoff for BALF.CTL2 : 88 reads
## Cutoff for BALF.CKO1 : 81 reads
## Cutoff for BALF.CKO2 : 123 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 12067 1642 21658
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2
## 12067 1642 2723 2604 2786 2988 2657 2546
## BALF.CKO1 BALF.CKO2
## 2966 2388
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.98)
## Cutoff for LUNG.CTL1 : 131 reads
## Cutoff for LUNG.CTL2 : 175 reads
## Cutoff for LUNG.CKO1 : 136 reads
## Cutoff for LUNG.CKO2 : 75 reads
## Cutoff for BALF.CTL1 : 61 reads
## Cutoff for BALF.CTL2 : 97 reads
## Cutoff for BALF.CKO1 : 90 reads
## Cutoff for BALF.CKO2 : 137 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 11692 2205 21470
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2
## 11692 2205 2680 2610 2764 2985 2612 2579
## BALF.CKO1 BALF.CKO2
## 2951 2289
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.99)
## Cutoff for LUNG.CTL1 : 153 reads
## Cutoff for LUNG.CTL2 : 206 reads
## Cutoff for LUNG.CKO1 : 160 reads
## Cutoff for LUNG.CKO2 : 88 reads
## Cutoff for BALF.CTL1 : 71 reads
## Cutoff for BALF.CTL2 : 114 reads
## Cutoff for BALF.CKO1 : 107 reads
## Cutoff for BALF.CKO2 : 160 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 10966 3395 21006
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2
## 10966 3395 2576 2621 2666 2984 2517 2629
## BALF.CKO1 BALF.CKO2
## 2891 2122
## q0.99
#cutoff.FB <- c(153,206,160,88,
# 71,114,107,160)
#
## ref to
# LUNG.CTL1 0.93
# LUNG.CTL2 0.94
# LUNG.CKO1 0.93/0.91
# LUNG.CKO2 0.99/0.97
# BALF.CTL1 0.91/0.93
# BALF.CTL2 0.99
# BALF.CKO1 0.96
# BALF.CKO2 0.92
# manual
cutoff.FB <- c(90,127,86,67,
42,114,74,90)
names(cutoff.FB) <- rownames(FB.seur)
cutoff.FB
## LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 BALF.CKO1 BALF.CKO2
## 90 127 86 67 42 114 74 90
data.frame(cutoff=cutoff.FB)
## cutoff
## LUNG.CTL1 90
## LUNG.CTL2 127
## LUNG.CKO1 86
## LUNG.CKO2 67
## BALF.CTL1 42
## BALF.CTL2 114
## BALF.CKO1 74
## BALF.CKO2 90
par(mfrow=c(2,4))
#plot(sort(rowSums(t(FB))),
# xlab="reordered cells",
# ylab="UMI counts", log="xy",
# main="sum")
plot(sort(t(FB)[,1]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[1])
abline(h=cutoff.FB[1],col = color.FB[1])
plot(sort(t(FB)[,2]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[2])
abline(h=cutoff.FB[2],col = color.FB[2])
plot(sort(t(FB)[,3]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[3])
abline(h=cutoff.FB[3],col = color.FB[3])
plot(sort(t(FB)[,4]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[4])
abline(h=cutoff.FB[4],col = color.FB[4])
plot(sort(t(FB)[,5]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[5])
abline(h=cutoff.FB[5],col = color.FB[5])
plot(sort(t(FB)[,6]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[6])
abline(h=cutoff.FB[6],col = color.FB[6])
plot(sort(t(FB)[,7]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[7])
abline(h=cutoff.FB[7],col = color.FB[7])
plot(sort(t(FB)[,8]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[8])
abline(h=cutoff.FB[8],col = color.FB[8])
par(mfrow=c(2,4))
#plot(sort(rowSums(t(FB))),
# xlab="reordered cells",
# ylab="UMI counts", log="xy",
# main="sum")
plot(sort(t(FB)[,1], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[1])
abline(h=cutoff.FB[1],col = color.FB[1])
plot(sort(t(FB)[,2], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[2])
abline(h=cutoff.FB[2],col = color.FB[2])
plot(sort(t(FB)[,3], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[3])
abline(h=cutoff.FB[3],col = color.FB[3])
plot(sort(t(FB)[,4], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[4])
abline(h=cutoff.FB[4],col = color.FB[4])
plot(sort(t(FB)[,5], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[5])
abline(h=cutoff.FB[5],col = color.FB[5])
plot(sort(t(FB)[,6], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[6])
abline(h=cutoff.FB[6],col = color.FB[6])
plot(sort(t(FB)[,7], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[7])
abline(h=cutoff.FB[7],col = color.FB[7])
plot(sort(t(FB)[,8], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[8])
abline(h=cutoff.FB[8],col = color.FB[8])
## custom cutoff run this
custom.Demux <- function(FBobj,FBcutoff){
# define decision matrix
dd <- FBobj@assays[['HTO']]@counts
dd <- apply(dd, 2, function(x){
x > FBcutoff
})
x1 <- colSums(dd)
x1[x1>1] <- "Doublet"
x1[x1==0] <- "Negative"
x1[x1==1] <- "Singlet"
x2 <- x1
bc.slt <- names(x1)[x1=="Singlet"]
for(bc in bc.slt){
x2[bc] <- rownames(dd)[dd[,bc]]
}
FBdf <- data.frame(HTO_classification.global=factor(x1,
levels = c("Doublet","Negative","Singlet")),
hash.ID=factor(x2,
levels = c("Doublet","Negative",rownames(dd))))
return(FBdf)
}
df.FB <- custom.Demux(FB.seur,cutoff.FB)
## custom cutoff run this
dim(df.FB)
## [1] 35367 2
df.FB[1:10,]
## HTO_classification.global hash.ID
## AAACCCAAGACATAAC-1 Singlet BALF.CTL1
## AAACCCAAGAGCATTA-1 Doublet Doublet
## AAACCCAAGCATCAAA-1 Doublet Doublet
## AAACCCAAGCCGGAAT-1 Singlet BALF.CKO2
## AAACCCAAGCTAAACA-1 Doublet Doublet
## AAACCCAAGGTAAGGA-1 Singlet LUNG.CTL2
## AAACCCAAGGTCACCC-1 Doublet Doublet
## AAACCCAAGTGGCCTC-1 Doublet Doublet
## AAACCCACAAACGTGG-1 Singlet LUNG.CKO2
## AAACCCACAACACAAA-1 Singlet LUNG.CTL2
## custom cutoff run this
table(df.FB)
## hash.ID
## HTO_classification.global Doublet Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1
## Doublet 12663 0 0 0 0
## Negative 0 902 0 0 0
## Singlet 0 0 2814 2626 2859
## hash.ID
## HTO_classification.global LUNG.CKO2 BALF.CTL1 BALF.CTL2 BALF.CKO1 BALF.CKO2
## Doublet 0 0 0 0 0
## Negative 0 0 0 0 0
## Singlet 2905 2745 2399 2922 2532
## custom cutoff run this
FB.seur@meta.data[,c("HTO_classification.global","hash.ID")] <- df.FB
FB.seur@meta.data[1:4,]
## orig.ident nCount_RNA nFeature_RNA nCount_HTO nFeature_HTO
## AAACCCAAGACATAAC-1 Lung_FB 191 8 191 8
## AAACCCAAGAGCATTA-1 Lung_FB 299 8 299 8
## AAACCCAAGCATCAAA-1 Lung_FB 5127 8 5127 8
## AAACCCAAGCCGGAAT-1 Lung_FB 262 8 262 8
## HTO_maxID HTO_secondID HTO_margin HTO_classification
## AAACCCAAGACATAAC-1 BALF.CTL1 LUNG.CTL2 1.1966616 BALF.CTL1
## AAACCCAAGAGCATTA-1 BALF.CKO1 LUNG.CKO1 0.6018605 BALF.CKO1
## AAACCCAAGCATCAAA-1 LUNG.CTL1 LUNG.CKO2 2.2958784 LUNG.CKO2_LUNG.CTL1
## AAACCCAAGCCGGAAT-1 BALF.CKO2 BALF.CTL2 1.3042646 BALF.CKO2
## HTO_classification.global hash.ID
## AAACCCAAGACATAAC-1 Singlet BALF.CTL1
## AAACCCAAGAGCATTA-1 Doublet Doublet
## AAACCCAAGCATCAAA-1 Doublet Doublet
## AAACCCAAGCCGGAAT-1 Singlet BALF.CKO2
## default q0.99 run this
#FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
# levels = c("Doublet","Negative","Singlet"))
#FB.seur$hash.ID <- factor(as.character(FB.seur$hash.ID),
# levels = c("Doublet","Negative",rownames(FB.seur)))
sl_stat <- table(FB.seur$HTO_classification.global)
barplot(sl_stat,ylim = c(0,25000),col = c("#FF6C91","lightgrey","#7CAE00"),main = "Feature Barcode statistics")
text(x=1:3*1.2-0.5,y=sl_stat+1550,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)
FB.seur@active.ident <- factor(FB.seur$HTO_maxID,levels=rownames(FB.seur))
RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]),same.y.lims= TRUE, ncol=4,
cols = color.FB)
FB.seur@meta.data$hash.ID.sort <- factor(as.character(FB.seur@meta.data$hash.ID),levels = c("Doublet","Negative",levels(FB.seur@active.ident)))
RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]), ncol=4,same.y.lims= TRUE,
cols = c("#FF6C91","lightgrey",color.FB),group.by = "hash.ID.sort")
# first, remove negative cells from th object
#FB.seur.subset <- FB.seur
# Calculate a tSNE embedding of the HTO data
#DefaultAssay(FB.seur.subset) <- "HTO"
#FB.seur.subset <- ScaleData(FB.seur.subset, features = rownames(FB.seur.subset), verbose = FALSE)
#FB.seur.subset <- RunPCA(FB.seur.subset, features = rownames(FB.seur.subset), approx = FALSE)
#FB.seur.subset <- RunTSNE(FB.seur.subset, dims = 1:8, perplexity = 500, check_duplicates = FALSE)
#saveRDS(FB.seur.subset, "FB0407.seur.subset.rds")
FB.seur.subset <- readRDS("FB0407.seur.subset.rds")
multiplot(
DimPlot(FB.seur.subset, order = rev(rownames(FB.seur)),
cols = color.FB) + labs(title = "FB Max_ID"),
DimPlot(FB.seur.subset, order = c("Doublet",rev(rownames(FB.seur)),"Negative"), group.by = 'hash.ID.sort', label = F,
cols = c("lightgrey",color.FB,"#FF6C91")) +
labs(title = "FB Filtered_ID") + theme(plot.title = element_text(hjust = 0)) ,
cols = 1)
Idents(FB.seur) <- "HTO_classification.global"
FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),levels = c("Doublet","Negative","Singlet"))
VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#FF6C91","lightgrey","#7CAE00")) &
geom_jitter(alpha=0.15, shape=16, width = 0.3 ,size = 0.12)
table(FB.seur@meta.data$hash.ID.sort)
##
## Doublet Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2
## 12663 902 2814 2626 2859 2905 2745 2399
## BALF.CKO1 BALF.CKO2
## 2922 2532
sl_stat <- table(FB.seur$hash.ID.sort)
barplot(sl_stat,ylim = c(0,15500),col = color.FBraw,
main = "Feature Barcode statistics",cex.names = 0.75)
text(x=1:10*1.2-0.45,y=sl_stat+645,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)
mainly follow https://satijalab.org/seurat/articles/pbmc3k_tutorial.html
GEX.seur <- CreateSeuratObject(counts = GEX,
min.cells = 3,
min.features = 100,
project = "Lung_GEX")
GEX.seur
## An object of class Seurat
## 18826 features across 35358 samples within 1 assay
## Active assay: RNA (18826 features, 0 variable features)
grep("^mt-",rownames(GEX),value = T)
## [1] "mt-Nd1" "mt-Nd2" "mt-Co1" "mt-Co2" "mt-Atp8" "mt-Atp6" "mt-Co3"
## [8] "mt-Nd3" "mt-Nd4l" "mt-Nd4" "mt-Nd5" "mt-Nd6" "mt-Cytb"
MT_gene <- grep("^mt-",rownames(GEX.seur),value = T)
GEX.seur[["percent.mt"]] <- PercentageFeatureSet(GEX.seur, features = MT_gene)
RB_gene <- grep("^Rps|^Rpl",rownames(GEX.seur),value = T)
GEX.seur[["percent.rb"]] <- PercentageFeatureSet(GEX.seur, features = RB_gene)
# Visualize QC metrics as a violin plot
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.01)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)
plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt")
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc
GEX.seur$FB.info <- FB.seur@meta.data[rownames(GEX.seur@meta.data),"hash.ID"]
#head(GEX.seur@meta.data)
table(GEX.seur$FB.info)
##
## Doublet Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2
## 12663 898 2814 2625 2858 2904 2744 2399
## BALF.CKO1 BALF.CKO2
## 2921 2532
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0.1, split.by = "FB.info")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##
## This message will be shown once per session.
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0, split.by = "FB.info")
GEX.seur <- subset(GEX.seur, subset = FB.info!="Doublet" & FB.info!="Negative")
GEX.seur
## An object of class Seurat
## 18826 features across 21797 samples within 1 assay
## Active assay: RNA (18826 features, 0 variable features)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0.1, split.by = "FB.info")
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0, split.by = "FB.info")
plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt")
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc
## keep low nFeature for granulocytes specifically neutrophils
#GEX.seur <- subset(GEX.seur, subset = percent.mt < 15 & nFeature_RNA >= 500 & nFeature_RNA < 6000 & nCount_RNA < 40000)
GEX.seur <- subset(GEX.seur, subset = percent.mt < 15 & nFeature_RNA >= 100 & nFeature_RNA < 6000 & nCount_RNA < 32000)
GEX.seur
## An object of class Seurat
## 18826 features across 21447 samples within 1 assay
## Active assay: RNA (18826 features, 0 variable features)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = color.FBraw,
pt.size = 0.1, split.by = "FB.info")
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = color.FBraw,
pt.size = 0, split.by = "FB.info")
plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt")
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc
par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID.sort)
barplot(sl_stat,ylim = c(0,14800),
col = c("#FF6C91","lightgrey",color.FB),
main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+875,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)
par(mar=c(6,4,2,3))
sl_stat <- table(GEX.seur$FB.info)
barplot(sl_stat,ylim = c(0,4800),
col = c("#FF6C91","lightgrey",color.FB),
main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+325,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)
s.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$s.genes))
g2m.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$g2m.genes))
GEX.seur <- CellCycleScoring(GEX.seur,
s.features = s.genes,
g2m.features = g2m.genes)
VlnPlot(GEX.seur, features = c("S.Score", "G2M.Score"), group.by = "FB.info",
ncol = 2, pt.size = 0.1)
GEX.seur.cc <- GEX.seur
GEX.seur.cc <- NormalizeData(GEX.seur.cc)
GEX.seur.cc <- FindVariableFeatures(GEX.seur.cc)
GEX.seur.cc <- ScaleData(GEX.seur.cc)
Idents(GEX.seur.cc) <- "Phase"
RidgePlot(GEX.seur.cc, features = c("Pcna", "Top2a", "Mcm6", "Mki67"), ncol = 2)
GEX.seur.cc <- RunPCA(GEX.seur.cc, features = c(s.genes, g2m.genes))
DimPlot(GEX.seur.cc, reduction = 'pca')
GEX.seur <- NormalizeData(GEX.seur, normalization.method = "LogNormalize", scale.factor = 10000)
GEX.seur <- FindVariableFeatures(GEX.seur, selection.method = "vst", nfeatures = 1500)
# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(GEX.seur), 10)
# plot variable features with and without labels
plot1 <- VariableFeaturePlot(GEX.seur)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
plot1 + plot2
need to remove TCR/BCR genes as DIGs
head(VariableFeatures(GEX.seur), 200)
## [1] "Igkc" "Ighm" "Jchain" "Mzb1"
## [5] "Ccl22" "Retnla" "Gm15056" "Mfge8"
## [9] "Mucl1" "Aldh1a2" "Ccl7" "Ccl8"
## [13] "Hmox1" "Il13" "Areg" "Ngp"
## [17] "Iglc2" "Ccl17" "Ccl2" "Il12b"
## [21] "Fscn1" "Ly6d" "C1qa" "Camp"
## [25] "Iglc1" "Spp1" "AY761185" "Ltf"
## [29] "Pf4" "Calca" "Igha" "Retnlg"
## [33] "Pou2af1" "Ccl1" "Cxcl10" "Tpsab1"
## [37] "Tph1" "Clu" "Klk1" "Cpa3"
## [41] "Rbp4" "Cd209e" "Arg1" "Fkbp11"
## [45] "Apoe" "Cd209d" "Siglech" "Ccr7"
## [49] "Cxcl3" "Csf2" "Igkv14-126" "Timp1"
## [53] "Ccl24" "Fabp4" "Ms4a1" "C1qc"
## [57] "Cxcl9" "Marco" "Il5" "Mgl2"
## [61] "C1qb" "Ly6a" "Ldoc1" "Cd209a"
## [65] "Il6" "Gpnmb" "Il4i1" "Cxcl1"
## [69] "Inhba" "Ccr9" "Mmp12" "Il17a"
## [73] "Cd79a" "Ccl12" "Lyz1" "Mmp13"
## [77] "Mcpt8" "Saa3" "Ch25h" "Iglc3"
## [81] "Hist1h1b" "Il2ra" "Rpl39l" "Cst3"
## [85] "H2-M2" "Rsad2" "Gm14275" "Il10"
## [89] "Cstdc5" "Ube2c" "Zmynd15" "Derl3"
## [93] "Cox6a2" "Gzmb" "Cma1" "Serpina3g"
## [97] "Gata2" "Krt79" "Fbp1" "Alox15"
## [101] "Ctsk" "Ckb" "Akap12" "Cstdc4"
## [105] "Tff1" "Syt7" "Ctla2a" "Gm26917"
## [109] "Hspa1a" "Gm13546" "Mt2" "Xcr1"
## [113] "Snhg1" "Iglv1" "Top2a" "Gzmc"
## [117] "AA467197" "Hist1h2ae" "Snhg12" "Scgb1a1"
## [121] "Slpi" "Cd207" "Lcn2" "Spata7"
## [125] "Tbc1d4" "Tnfrsf17" "Hist1h2ap" "Cd163l1"
## [129] "Dcstamp" "Hspa1b" "Sept3" "Edn1"
## [133] "Cacnb3" "Asic3" "Eaf2" "1500009L16Rik"
## [137] "Penk" "Scin" "Ear1" "Klk4"
## [141] "Pkib" "Pclaf" "Il17f" "Hba-a1"
## [145] "Hpgd" "Car4" "Ctla4" "Igkv10-96"
## [149] "Mki67" "Mt3" "Xcl1" "Cbr2"
## [153] "2410006H16Rik" "Cd79b" "Timd4" "Plet1"
## [157] "Stfa2" "Wfdc21" "Fcer1a" "Serpinb6b"
## [161] "Pcsk1" "Igfbp4" "Hbb-bs" "Fcrls"
## [165] "Pcp4" "Gzma" "Nos2" "H2-Eb1"
## [169] "Igkv12-44" "Wfdc2" "Hba-a2" "Tcf4"
## [173] "Txndc5" "Cenpf" "Gm21762" "Hamp"
## [177] "Tnfrsf4" "Hmgb2" "Ppbp" "Ccdc80"
## [181] "Ebf1" "Ifi44" "Ccl5" "Lpl"
## [185] "Tnf" "Rrm2" "Rnase2a" "Nusap1"
## [189] "Ear2" "Il1rl1" "Cyp7b1" "Ctsl"
## [193] "Hs3st1" "Zfas1" "Ms4a4c" "Cyp11a1"
## [197] "Tcaf2" "Cfb" "Ear6" "H2-Ab1"
GEX.seur <- ScaleData(GEX.seur, features = rownames(GEX.seur))
## Centering and scaling data matrix
# exclude MT genes and more
DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Vpreb|Lars2|Jun|Fos|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^AC|^AA|^AW|^BC|^Gm|^Hist|Rik$|-ps",
rownames(GEX.seur),value = T)
CC_gene <- Hmisc::capitalize(tolower(as.vector(unlist(cc.genes.updated.2019))))
VariableFeatures(GEX.seur) <- setdiff(VariableFeatures(object = GEX.seur),
c(MT_gene,
DIG,
CC_gene) )
GEX.seur <- RunPCA(GEX.seur, features = VariableFeatures(GEX.seur))
## PC_ 1
## Positive: S100a9, S100a8, Isg15, Acod1, Hdc, Ccl4, Ccrl2, Asprv1, Ifitm1, Rsad2
## Egr1, Clec4e, Ccl3, Isg20, Ifit1, Lcn2, Ifitm3, Cstdc4, Chil1, Retnlg
## Stfa2l1, Mrgpra2b, Cmpk2, Cxcl2, Ifit3b, Oasl2, Gbp2b, Ly6e, Ptgs2, Scgb1a1
## Negative: Anxa5, Ppp1r14b, Atp5g1, Tuba1b, Myof, Lpl, Chchd10, Prdx1, Mrc1, Slc7a2
## Rpn1, Vat1, Cisd1, Dbi, Sdf2l1, Nrp2, Atp6v0d2, Pdia6, Anxa4, Pdia4
## Axl, Ptma, Calr, Ranbp1, Plet1, Fabp5, S100a1, Ctss, Shtn1, Lgmn
## PC_ 2
## Positive: Chil3, Ctsd, Ccl6, Lyz2, Ctsb, Ctss, Atp6v0d2, Slc7a2, Mrc1, F7
## Cybb, Clec4a3, Itgax, Ccl9, F10, Lgals3, Myof, Anxa4, Lrp1, Lpl
## Ear2, Fn1, Plet1, Tgm2, Wfdc17, Nrp2, Mgll, Cd68, Dab2, Rnase2a
## Negative: Gata3, Rora, Il2ra, Hlf, Ptprcap, Ctla2a, Pdcd1, Icos, Dut, Skap1
## Selenoh, Itgb7, Pclaf, Sh2d2a, Il1rl1, Lig1, Itk, Ar, Cxcr6, Smco4
## Nrip1, Ptpn13, Plod2, Dkk3, Ptpn22, Ikzf3, Nup210, Pmepa1, Ctla4, Ramp1
## PC_ 3
## Positive: Ctsd, Chil3, Atp6v0d2, Mgll, Ctsk, Car4, Fpr2, Krt79, Ch25h, Slc7a2
## Bhlhe41, Abcg1, Kcnn3, Ms4a8a, Dst, Abcc5, Cfb, Plet1, F7, Trim29
## Sort1, Phgdh, Olr1, B3gnt7, Cisd1, Cidec, Il11ra1, Pld3, Abcd2, Emilin1
## Negative: Cd74, H2-Aa, H2-Ab1, H2-Eb1, H2-DMb1, Cbfa2t3, H2-DMa, Cd83, Aif1, Ms4a6c
## Ccr2, S100a4, Pld4, Cd86, Sdc4, Tmem176b, Ifi30, Napsa, Plbd1, Ms4a4c
## Syngr2, Mafb, Tmem176a, Rnase6, Ahr, Ciita, Cst3, Ccl24, Mgl2, Ece1
## PC_ 4
## Positive: Ifitm3, S100a9, S100a8, Cxcl2, Isg15, Pclaf, Ccrl2, Acod1, Rsad2, Ccna2
## Spc24, Cst3, Wfdc17, Kif15, Hdc, Clec4e, Cenpm, Knl1, Ifit1, Isg20
## Asf1b, Asprv1, Dut, Egr1, Tk1, Clec4n, Ctsb, Hmgb3, Atf3, Plbd1
## Negative: Nkg7, Gzma, Ccl5, Irf8, Prf1, Klrd1, Serpinb9, Ms4a4b, Eomes, Gzmb
## Ncr1, Satb1, Serpinb6b, Atp1b1, Bcl2, Serpinb9b, Klri2, Klra3, Klrc1, Cma1
## Sh2d2a, Gramd3, Cd7, Ccnd2, Il18r1, Ptprcap, Cd28, Klra7, Lgals1, Kcnq1ot1
## PC_ 5
## Positive: Ccl22, Aldh1a2, Cacnb3, Ccl17, Plppr4, Il4i1, Lad1, Fscn1, Cxcl16, Nudt17
## Ccr7, Ramp3, Socs2, Nr4a3, Tbc1d4, Tspan33, Palld, Mir155hg, Flrt3, Strip2
## Sema6d, Tspan3, Dcstamp, Zmynd15, Mfge8, Rora, Rgs1, Il7r, Bcl2a1d, Cxcr6
## Negative: Siglech, Klk1, Cox6a2, Ccr9, Spib, Bcl11a, Tcf4, Cadm1, Mef2c, Klk1b27
## Rnase6, Pacsin1, Kmo, Lifr, Smim5, Sh3bgr, Ly6c2, Pld4, Fcrla, Plac8
## Bst2, Cyb561a3, Ly6c1, Ly6d, Flt3, Tmem108, Ly6e, Pltp, Upb1, Pou2af1
length(setdiff(VariableFeatures(object = GEX.seur),
c(MT_gene,DIG,CC_gene)))
## [1] 1265
setdiff(VariableFeatures(object = GEX.seur),
c(MT_gene,DIG,CC_gene))[1:300]
## [1] "Ccl22" "Retnla" "Mfge8" "Mucl1" "Aldh1a2" "Ccl7"
## [7] "Ccl8" "Hmox1" "Il13" "Areg" "Ngp" "Ccl17"
## [13] "Ccl2" "Il12b" "Fscn1" "Ly6d" "C1qa" "Camp"
## [19] "Spp1" "Ltf" "Pf4" "Calca" "Retnlg" "Pou2af1"
## [25] "Ccl1" "Cxcl10" "Tpsab1" "Tph1" "Clu" "Klk1"
## [31] "Cpa3" "Rbp4" "Cd209e" "Arg1" "Fkbp11" "Apoe"
## [37] "Cd209d" "Siglech" "Ccr7" "Cxcl3" "Csf2" "Timp1"
## [43] "Ccl24" "Fabp4" "Ms4a1" "C1qc" "Cxcl9" "Marco"
## [49] "Il5" "Mgl2" "C1qb" "Ly6a" "Ldoc1" "Cd209a"
## [55] "Il6" "Gpnmb" "Il4i1" "Cxcl1" "Inhba" "Ccr9"
## [61] "Mmp12" "Il17a" "Cd79a" "Ccl12" "Lyz1" "Mmp13"
## [67] "Mcpt8" "Saa3" "Ch25h" "Il2ra" "Cst3" "H2-M2"
## [73] "Rsad2" "Il10" "Cstdc5" "Zmynd15" "Derl3" "Cox6a2"
## [79] "Gzmb" "Cma1" "Serpina3g" "Gata2" "Krt79" "Fbp1"
## [85] "Alox15" "Ctsk" "Ckb" "Akap12" "Cstdc4" "Tff1"
## [91] "Syt7" "Ctla2a" "Mt2" "Xcr1" "Snhg1" "Gzmc"
## [97] "Snhg12" "Scgb1a1" "Slpi" "Cd207" "Lcn2" "Spata7"
## [103] "Tbc1d4" "Tnfrsf17" "Cd163l1" "Dcstamp" "Sept3" "Edn1"
## [109] "Cacnb3" "Asic3" "Eaf2" "Penk" "Scin" "Ear1"
## [115] "Klk4" "Pkib" "Pclaf" "Il17f" "Hpgd" "Car4"
## [121] "Ctla4" "Mt3" "Xcl1" "Cbr2" "Cd79b" "Timd4"
## [127] "Plet1" "Stfa2" "Wfdc21" "Fcer1a" "Serpinb6b" "Pcsk1"
## [133] "Igfbp4" "Fcrls" "Pcp4" "Gzma" "Nos2" "H2-Eb1"
## [139] "Wfdc2" "Tcf4" "Txndc5" "Hamp" "Tnfrsf4" "Ppbp"
## [145] "Ccdc80" "Ebf1" "Ifi44" "Ccl5" "Lpl" "Tnf"
## [151] "Rnase2a" "Ear2" "Il1rl1" "Cyp7b1" "Ctsl" "Hs3st1"
## [157] "Zfas1" "Ms4a4c" "Cyp11a1" "Tcaf2" "Cfb" "Ear6"
## [163] "H2-Ab1" "Cd36" "Mt1" "Klk1b11" "Hlf" "Cd40"
## [169] "Bcl11a" "Slc1a2" "S100a1" "Ifit1" "Ly6g" "Il4"
## [175] "Fam71f2" "Il1a" "Fn1" "Bex1" "Serpinb2" "Gata3"
## [181] "Ifit2" "Krt18" "Hapln3" "Ereg" "Epcam" "H2-Aa"
## [187] "Fabp5" "Mmp19" "Dut" "Nudt17" "Thbs1" "Igf1"
## [193] "Ccl6" "Slc7a2" "Itgae" "Ccn3" "Atp6v0d2" "Tnfsf8"
## [199] "Ccl4" "Il2" "Chil4" "Ly6c2" "Lmo7" "Bglap3"
## [205] "Pdcd1" "Mgll" "Mcpt1" "Mafb" "F13a1" "Calcb"
## [211] "Ikzf2" "Gas5" "Ccl9" "Hal" "Plppr4" "Apod"
## [217] "Rgs6" "Sdc4" "Ifitm6" "Mrc1" "Cxcl16" "Cd74"
## [223] "Fpr2" "Mmp10" "Ramp3" "Spib" "Flrt3" "H2-DMb1"
## [229] "Car2" "Mef2c" "Prdx1" "Gprin3" "Prok2" "Rrad"
## [235] "Cybb" "Prg2" "Palld" "Nppc" "Olfm4" "Krt19"
## [241] "Ms4a8a" "Msc" "S100a8" "Mmp25" "Tspan3" "Adig"
## [247] "Ccl3" "Asgr2" "Nr4a3" "Stab2" "Rora" "Creld2"
## [253] "Nrg1" "Selenoh" "Bpifa1" "Stfa2l1" "Rnase6" "Ocstamp"
## [259] "Prc1" "Ctsg" "Pla2g2d" "Cxcl5" "Nupr1" "Hilpda"
## [265] "Dnase1l3" "Cd83" "Wfdc17" "Fggy" "Krt8" "Gatm"
## [271] "Adamts9" "Egr4" "Ccna2" "Rgcc" "Tmem176a" "Olr1"
## [277] "Cxcr6" "Cldn1" "Cdh1" "Hes1" "Olfr889" "Ptcra"
## [283] "Tnfrsf9" "Tnfsf11" "Scd1" "Lipa" "Cbfa2t3" "Tgm2"
## [289] "Bank1" "Timp3" "Fxyd3" "Vcan" "Cmpk2" "Lad1"
## [295] "Aif1" "Tm4sf5" "Lipc" "Plac8" "Icos" "Clec10a"
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "orig.ident") +
DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "orig.ident")
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "FB.info", cols = color.FB) +
DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "FB.info", cols = color.FB)
DimHeatmap(GEX.seur, dims = 1:12, cells = 1500, balanced = TRUE,ncol = 4)
ElbowPlot(GEX.seur,ndims = 50)
PCs <- 1:32
GEX.seur <- FindNeighbors(GEX.seur, dims = PCs, k.param = 20)
## Computing nearest neighbor graph
## Computing SNN
GEX.seur <- FindClusters(GEX.seur, method = 'igraph', resolution = 2.8)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 21447
## Number of edges: 889971
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7962
## Number of communities: 39
## Elapsed time: 4 seconds
GEX.seur <- RunTSNE(GEX.seur, dims=PCs, complexity = 100)
GEX.seur <- RunUMAP(GEX.seur, dims=PCs, n.neighbors = 20, seed.use = 133)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 11:43:51 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:43:51 Read 21447 rows and found 32 numeric columns
## 11:43:51 Using Annoy for neighbor search, n_neighbors = 20
## 11:43:51 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:43:54 Writing NN index file to temp file C:\Users\Shaorui\AppData\Local\Temp\RtmpSK3T0e\filead98213e264d
## 11:43:54 Searching Annoy index using 1 thread, search_k = 2000
## 11:43:59 Annoy recall = 100%
## 11:44:00 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 20
## 11:44:01 Initializing from normalized Laplacian + noise (using irlba)
## 11:44:02 Commencing optimization for 200 epochs, with 651750 positive edges
## 11:44:24 Optimization finished
DimPlot(GEX.seur, reduction = "tsne", label = T) + DimPlot(GEX.seur, reduction = "umap", label = T)
FeaturePlot(GEX.seur, reduction = "umap", features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rb"))
GEX.seur$cnt <- factor(gsub("1$|2$|3$|4$|5$","",as.character(GEX.seur$FB.info)),
levels = c("LUNG.CTL","LUNG.CKO",
"BALF.CTL","BALF.CKO"))
DimPlot(GEX.seur, reduction = "umap", label = F,group.by = "FB.info", split.by = "FB.info", ncol = 4,
cols = color.FB)
DimPlot(GEX.seur, reduction = "umap", label = F,group.by = "cnt", split.by = "cnt", ncol = 2,
cols = color.cnt)
library(DoubletFinder)
## 载入需要的程辑包:KernSmooth
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
## 载入需要的程辑包:ROCR
## NULL
## [1] "Creating 7149 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
## [1] "Creating 7149 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.05") +
DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.1")
# directly using prevously sorted markers
markers.preAnno <- list(CC=c("Pcna","Top2a","Mcm6","Mki67","Ptprc"),
Bcell=c("Cd19","Cd79a","Ms4a1","Ighd"),
Plasma=c("Ighm","Mzb1","Jchain","Sdc1"),
CD8T=c("Cd3d","Cd3e","Cd8a","Cd8b1","Sell"),
Treg=c("Foxp3"),
gdT=c("Trac","Tcrg-C1"),
NKs=c("Klrb1c","Ncr1","Eomes","Klrd1","Klre1","Prf1","Gzma","Gzmb","Ccl5","Nkg7","Serpinb6b","Serpinb9"),
#ILC2_CC=c(""),
ILC2=c("Gata3","Il1rl1","Ramp1","Areg","Calca","Stab2","Il5","Hilpda"),
BAS=c("Il6","Gata2","Cpa3","Ms4a2","Mcpt8","Cd200r3","Cyp11a1"),
EOS=c("Siglecf","Ear6","Ccr3","Ldhc"),
NEU1=c("Retnlg","Mmp8","Hp"),
NEU2=c("Cxcl3","Il1a","Tnf"),
NEU3=c("Cstb","Ccl4"),
#NEU4=c(""),
NEU5=c("Ifit1","Isg20","Stat1"),
pDC=c("Siglech","Tcf4","Ccr9","Klk1","Cox6a2"),
cDCs=c("Cd209a","Cd209d","Cd209e","Itgae","H2-Ab1","H2-Aa","H2-Eb1"),
migDC=c("Ccr7","Fscn1","Ccl22","Ccl17","Aldh1a2","Pkib","Il4i1","Batf3"),
Monocyte=c("Clec4a1","Plac8","Lyz2","S100a4","Cx3cr1","Ace","Adgre4","Ifitm6","F13a1","Ly6c1","Ly6c2"),
IM1=c("Arg1","Anpep","Lgals3","Sgk1","Spp1","Mmp19","Ccl7","Ccl2","Hal","Gpnmb"),
IM2=c("Retnla","Ccl24","Ece1"),
AM_CC=c("S100a1","Abcg1","Mgll","Chil3","Lpl","Lipa","Vat1","F7","Anxa4"),
AM1=c("Mt2","Cxcl1","Inhba","Cd14")
#AM2=c("")
)
as.vector(unlist(markers.preAnno))
## [1] "Pcna" "Top2a" "Mcm6" "Mki67" "Ptprc" "Cd19"
## [7] "Cd79a" "Ms4a1" "Ighd" "Ighm" "Mzb1" "Jchain"
## [13] "Sdc1" "Cd3d" "Cd3e" "Cd8a" "Cd8b1" "Sell"
## [19] "Foxp3" "Trac" "Tcrg-C1" "Klrb1c" "Ncr1" "Eomes"
## [25] "Klrd1" "Klre1" "Prf1" "Gzma" "Gzmb" "Ccl5"
## [31] "Nkg7" "Serpinb6b" "Serpinb9" "Gata3" "Il1rl1" "Ramp1"
## [37] "Areg" "Calca" "Stab2" "Il5" "Hilpda" "Il6"
## [43] "Gata2" "Cpa3" "Ms4a2" "Mcpt8" "Cd200r3" "Cyp11a1"
## [49] "Siglecf" "Ear6" "Ccr3" "Ldhc" "Retnlg" "Mmp8"
## [55] "Hp" "Cxcl3" "Il1a" "Tnf" "Cstb" "Ccl4"
## [61] "Ifit1" "Isg20" "Stat1" "Siglech" "Tcf4" "Ccr9"
## [67] "Klk1" "Cox6a2" "Cd209a" "Cd209d" "Cd209e" "Itgae"
## [73] "H2-Ab1" "H2-Aa" "H2-Eb1" "Ccr7" "Fscn1" "Ccl22"
## [79] "Ccl17" "Aldh1a2" "Pkib" "Il4i1" "Batf3" "Clec4a1"
## [85] "Plac8" "Lyz2" "S100a4" "Cx3cr1" "Ace" "Adgre4"
## [91] "Ifitm6" "F13a1" "Ly6c1" "Ly6c2" "Arg1" "Anpep"
## [97] "Lgals3" "Sgk1" "Spp1" "Mmp19" "Ccl7" "Ccl2"
## [103] "Hal" "Gpnmb" "Retnla" "Ccl24" "Ece1" "S100a1"
## [109] "Abcg1" "Mgll" "Chil3" "Lpl" "Lipa" "Vat1"
## [115] "F7" "Anxa4" "Mt2" "Cxcl1" "Inhba" "Cd14"
DotPlot(GEX.seur, features = rev(as.vector(unlist(markers.preAnno))[1:60]), group.by = "seurat_clusters") + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
DotPlot(GEX.seur, features = rev(as.vector(unlist(markers.preAnno))[61:120]), group.by = "seurat_clusters") + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
GEX.seur$sort_clusters <- factor(as.character(GEX.seur$seurat_clusters),
levels = c(34, # Bcell
35, # Plasma
20,21,32,27, # Tcell
19,0, # NKs
38, # CC mix
9,16,33, # ILC2
37, # BAS
14, # EOS
13,1,11,12,17,6,4,3,5,2,8,31, # NEU
29, # pDC
30, # cDCs
36, # CC mix
26, # migDC
24,10, # Monocyte
23,18,15, # IMs
25,7,22,28 # AMs
))
DotPlot(GEX.seur, features = rev(as.vector(unlist(markers.preAnno))[1:60]), group.by = "sort_clusters") + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
DotPlot(GEX.seur, features = rev(as.vector(unlist(markers.preAnno))[61:120]), group.by = "sort_clusters") + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
ncol = 2,
pt.size = 0, group.by = "sort_clusters") &
geom_jitter(alpha=0.15, shape=16, width = 0.3 ,size = 0.12)
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
ncol = 2,
pt.size = 0, group.by = "sort_clusters")
# find markers for every cluster compared to all remaining cells, report only the positive ones
Idents(GEX.seur) <- "sort_clusters"
#GEX.markers.pre <- FindAllMarkers(GEX.seur, only.pos = TRUE, min.pct = 0.05,
# test.use = "MAST",
# logfc.threshold = 0.25)
GEX.markers.pre <- read.table("sc10x_RZB.markers.pre20240408.csv", header = TRUE, sep = ",")
#GEX.markers.pre %>% group_by(cluster) %>% top_n(n = 8, wt = avg_log2FC)
GEX.markers.pre$cluster <- factor(as.character(GEX.markers.pre$cluster),
levels = levels(GEX.seur$sort_clusters))
markers.pre_t60 <- (GEX.markers.pre %>% group_by(cluster) %>%
filter(pct.1>0.05 & gene %in% grep("Rps|Rpl|mt-",GEX.markers.pre$gene,invert = T,value = T)) %>%
top_n(n = 60, wt = avg_log2FC) %>%
filter(p_val_adj < 0.01) %>%
ungroup() %>%
arrange(desc(avg_log2FC*pct.1),gene) %>%
distinct(gene, .keep_all = TRUE) %>%
arrange(cluster,p_val_adj))$gene
markers.pre_t120 <- (GEX.markers.pre %>% group_by(cluster) %>%
filter(pct.1>0.05 & gene %in% grep("Rps|Rpl|mt-",GEX.markers.pre$gene,invert = T,value = T)) %>%
top_n(n = 120, wt = avg_log2FC) %>%
filter(p_val_adj < 0.01) %>%
ungroup() %>%
arrange(desc(avg_log2FC*pct.1),gene) %>%
distinct(gene, .keep_all = TRUE) %>%
arrange(cluster,p_val_adj))$gene
ttt = 1159
ttt/60
## [1] 19.31667
ttt/64
## [1] 18.10938
ttt/65
## [1] 17.83077
ttt = 1977
ttt/60
## [1] 32.95
ttt/64
## [1] 30.89062
ttt/65
## [1] 30.41538
pp.t60 <- list()
for(i in 1:18){
pp.t60[[i]] <- DotPlot(GEX.seur, features = rev(markers.pre_t60[(65*i-64):(65*i)])) + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
}
pp.t60
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
##
## [[15]]
##
## [[16]]
##
## [[17]]
##
## [[18]]
pp.t120 <- list()
for(i in 1:33){
pp.t120[[i]] <- DotPlot(GEX.seur, features = rev(markers.pre_t120[(60*i-59):(60*i)])) + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
}
#pp.t120
#marker.GSE127465 <- list(Mono1=c("Vcan","Ly6c1","Ly6c2"),
# Mono2=c("Cd300e","Itgal","Ace","Fcgr4"),
# Mono3=c("Il1b","S100a9","S100a8"),
# )
marker.GSE12746 <- list(NK=c("Gzma","Klrb1c","Ncr1","Klra4","Eomes","Gzmb","Fasl"),
Tcell=c("Cd3d","Cd3e","Cd3g","Cd28"),
Bcell=c("Cd79a","Fcmr","Cd79b","Cd19","Fcer2a","Pax5","Cd22"),
Basophil=c("Il6","Gata2","Cpa3","Ms4a2","Fcer1a"),
Neutrophil=c("S100a9","S100a8","Mmp9","Csf3r","Mmp8","Il1rn","Cxcr2"),
Mono1=c("Vcan","Ly6c1","Ly6c2"),
Mono2=c("Cd300e","Itgal","Ace","Fcgr4"),
Mono3=c("Il1b","S100a9","S100a8","Csf1r","Cd14"),
#MoMDC=c("Lyz1","Csf1r","Cd300e","Mafb","Batf3","Krt79","Msr1"),
mpDC=c("Siglech","Ccr9","Bst2","Pacsin1","Tcf4","Bcl11a","Runx2"),
mDC1=c("Xcr1","Cadm1","Cd36","Itgae","Cd24a"),
mDC2=c("Itgax","H2-DMb2","Csf1r"),
mDC3=c("Fscn1","Ccr7","Ccl22","Tnfrsf9","Sema7a","Stat4","Il12b")
)
DotPlot(GEX.seur, features = rev(unique(as.vector(unlist(marker.GSE12746)))), group.by = "sort_clusters") + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
check markers from Gibbings2017 which’s fig.2 defined signatures of AM and IM1-3
markers.Gibbings2017 <- list(Mac.genes=c("Mertk","Fcgr1","Adgre1","Cd68","Lyz2","Lamp1","Cd36"), # Emr1: Adgre1
DC.genes=c("Ccr7","Dpp4","Zbtb46","Batf3","Irf4","Itgam","Itgax"), # 4 more added, Batf3, Irf4, CD11b, CD11c
Mono.genes=c("Mafb","Maf","Cx3cr1","Itgam","Cd14","Cd163","Csf1r","Fcgr3"),
AM.genes=c("Marco","Siglecf","Car4","Csf2ra","Csf2rb"),
M1.genes=c("H2-Ab1","H2-Aa","Cd86","Tlr2","Tlr4","Socs3","Tnf","Il1b","Il18"),
M2.genes=c("Chil3","Chil4","Tgm2","Retnla","Mrc1","Il10"), # no Clec7a expressed
Inflam=c("Lyve1","C1qb","C1qa","C1qc","C4b","Fcna","Colec12","Ifnar1","Ifnar2","Ifngr1","Ifngr2",
"Il4ra","Il6ra","Il10ra","Il10rb","Il21r","Il12rb2","Il17ra"),
Chemo.lr=c("Ccl12","Ccl2","Ccl24","Ccl3","Ccl4","Ccl6","Ccl7","Ccl8","Ccl9","Cxcl13","Cxcl14","Cxcl16","Cxcl2","Ccr1","Ccr2","Ccr5")
)
DotPlot(GEX.seur, features = rev(c(markers.Gibbings2017$Mac.genes,markers.Gibbings2017$DC.genes)), scale = FALSE) + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
DotPlot(GEX.seur, features = rev(c(markers.Gibbings2017$Mono.genes,markers.Gibbings2017$AM.genes)), scale = TRUE) + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
DotPlot(GEX.seur, features = rev(c(markers.Gibbings2017$M1.genes,markers.Gibbings2017$M2.genes)), scale = TRUE) + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
DotPlot(GEX.seur, features = rev(c(markers.Gibbings2017$Inflam)), scale = TRUE) + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
DotPlot(GEX.seur, features = rev(c(markers.Gibbings2017$Chemo.lr)), scale = TRUE) + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
GEX.seur$SP.info <- factor(as.character(GEX.seur$FB.info),
levels = levels(GEX.seur$FB.info)[3:10])
stat.SP <- table(GEX.seur@meta.data[,c("SP.info","sort_clusters")])
stat.SP
## sort_clusters
## SP.info 34 35 20 21 32 27 19 0 38 9 16 33 37 14 13 1 11
## LUNG.CTL1 28 16 44 43 24 26 94 415 12 268 175 15 9 49 120 295 79
## LUNG.CTL2 34 34 60 40 45 12 82 383 10 179 108 14 13 67 157 241 123
## LUNG.CKO1 39 43 140 130 34 46 89 309 18 100 96 3 7 55 157 311 160
## LUNG.CKO2 34 23 182 67 49 22 91 382 19 101 128 6 12 70 141 306 151
## BALF.CTL1 2 0 7 44 16 48 39 14 1 53 8 74 4 43 10 31 39
## BALF.CTL2 0 0 1 35 16 11 40 8 1 36 6 40 5 120 14 54 27
## BALF.CKO1 1 0 3 33 6 121 32 8 1 11 2 10 4 41 14 24 50
## BALF.CKO2 1 0 1 26 13 14 27 7 1 28 7 25 9 96 26 56 59
## sort_clusters
## SP.info 12 17 6 4 3 5 2 8 31 29 30 36 26 24 10 23 18
## LUNG.CTL1 17 45 12 1 11 7 14 8 3 58 58 29 57 63 127 100 110
## LUNG.CTL2 11 61 11 9 16 10 8 13 2 64 70 20 29 74 181 97 104
## LUNG.CKO1 17 139 17 13 19 25 17 27 5 44 46 10 31 95 184 49 137
## LUNG.CKO2 29 63 19 6 7 10 5 20 3 89 64 15 35 114 222 81 106
## BALF.CTL1 100 43 189 293 229 256 309 196 45 12 1 1 68 0 8 19 10
## BALF.CTL2 183 30 231 229 289 202 277 160 42 10 3 1 36 0 4 10 12
## BALF.CKO1 126 89 208 320 360 350 422 233 82 9 2 0 17 1 0 5 10
## BALF.CKO2 158 28 295 241 253 192 192 144 40 4 2 2 28 0 4 14 9
## sort_clusters
## SP.info 15 25 7 22 28
## LUNG.CTL1 58 43 28 146 43
## LUNG.CTL2 51 24 16 84 6
## LUNG.CKO1 55 26 10 86 18
## LUNG.CKO2 52 18 5 86 17
## BALF.CTL1 82 95 222 1 94
## BALF.CTL2 68 32 129 2 9
## BALF.CKO1 61 32 164 0 50
## BALF.CKO2 104 45 284 1 60
round(rowRatio(stat.SP),3)
## sort_clusters
## SP.info 34 35 20 21 32 27 19 0 38 9 16
## LUNG.CTL1 0.010 0.006 0.016 0.016 0.009 0.009 0.034 0.151 0.004 0.097 0.064
## LUNG.CTL2 0.013 0.013 0.023 0.016 0.018 0.005 0.032 0.149 0.004 0.070 0.042
## LUNG.CKO1 0.014 0.015 0.050 0.046 0.012 0.016 0.032 0.110 0.006 0.036 0.034
## LUNG.CKO2 0.012 0.008 0.064 0.024 0.017 0.008 0.032 0.134 0.007 0.035 0.045
## BALF.CTL1 0.001 0.000 0.003 0.016 0.006 0.018 0.014 0.005 0.000 0.020 0.003
## BALF.CTL2 0.000 0.000 0.000 0.015 0.007 0.005 0.017 0.003 0.000 0.015 0.003
## BALF.CKO1 0.000 0.000 0.001 0.011 0.002 0.042 0.011 0.003 0.000 0.004 0.001
## BALF.CKO2 0.000 0.000 0.000 0.010 0.005 0.006 0.011 0.003 0.000 0.011 0.003
## sort_clusters
## SP.info 33 37 14 13 1 11 12 17 6 4 3
## LUNG.CTL1 0.005 0.003 0.018 0.044 0.107 0.029 0.006 0.016 0.004 0.000 0.004
## LUNG.CTL2 0.005 0.005 0.026 0.061 0.094 0.048 0.004 0.024 0.004 0.004 0.006
## LUNG.CKO1 0.001 0.002 0.020 0.056 0.111 0.057 0.006 0.050 0.006 0.005 0.007
## LUNG.CKO2 0.002 0.004 0.025 0.049 0.107 0.053 0.010 0.022 0.007 0.002 0.002
## BALF.CTL1 0.027 0.001 0.016 0.004 0.011 0.014 0.037 0.016 0.070 0.108 0.085
## BALF.CTL2 0.017 0.002 0.051 0.006 0.023 0.011 0.077 0.013 0.097 0.097 0.122
## BALF.CKO1 0.003 0.001 0.014 0.005 0.008 0.017 0.043 0.031 0.072 0.110 0.124
## BALF.CKO2 0.010 0.004 0.038 0.010 0.022 0.024 0.063 0.011 0.118 0.097 0.101
## sort_clusters
## SP.info 5 2 8 31 29 30 36 26 24 10 23
## LUNG.CTL1 0.003 0.005 0.003 0.001 0.021 0.021 0.011 0.021 0.023 0.046 0.036
## LUNG.CTL2 0.004 0.003 0.005 0.001 0.025 0.027 0.008 0.011 0.029 0.071 0.038
## LUNG.CKO1 0.009 0.006 0.010 0.002 0.016 0.016 0.004 0.011 0.034 0.066 0.017
## LUNG.CKO2 0.004 0.002 0.007 0.001 0.031 0.022 0.005 0.012 0.040 0.078 0.028
## BALF.CTL1 0.095 0.114 0.072 0.017 0.004 0.000 0.000 0.025 0.000 0.003 0.007
## BALF.CTL2 0.085 0.117 0.067 0.018 0.004 0.001 0.000 0.015 0.000 0.002 0.004
## BALF.CKO1 0.121 0.145 0.080 0.028 0.003 0.001 0.000 0.006 0.000 0.000 0.002
## BALF.CKO2 0.077 0.077 0.058 0.016 0.002 0.001 0.001 0.011 0.000 0.002 0.006
## sort_clusters
## SP.info 18 15 25 7 22 28
## LUNG.CTL1 0.040 0.021 0.016 0.010 0.053 0.016
## LUNG.CTL2 0.041 0.020 0.009 0.006 0.033 0.002
## LUNG.CKO1 0.049 0.020 0.009 0.004 0.031 0.006
## LUNG.CKO2 0.037 0.018 0.006 0.002 0.030 0.006
## BALF.CTL1 0.004 0.030 0.035 0.082 0.000 0.035
## BALF.CTL2 0.005 0.029 0.013 0.054 0.001 0.004
## BALF.CKO1 0.003 0.021 0.011 0.057 0.000 0.017
## BALF.CKO2 0.004 0.042 0.018 0.114 0.000 0.024
melt.SP <- reshape2::melt(stat.SP)
melt.SP$sort_clusters <- factor(as.character(melt.SP$sort_clusters),
levels = rev(levels(GEX.seur$sort_clusters)))
color.clusters <- scales::hue_pal()(39)
names(color.clusters) <- as.character(0:38)
p.SP <- ggplot(melt.SP, aes(SP.info, weight=value, fill=sort_clusters)) +
geom_bar(color="black", width = .7, position = "fill") +
labs( y = "Proportion of total (%)",title = "",x="") +
scale_fill_manual(values = color.clusters[rev(levels(GEX.seur$sort_clusters))]) +
scale_y_continuous(expand = c(0,0), n.breaks = 6, labels = c(0,20,40,60,80,100)) +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 13.6),
axis.text.y = element_text(size = 16))+
guides(fill = guide_legend(reverse = T, title = ""))
p.SP
GEX.seur$preAnno1 <- as.character(GEX.seur$seurat_clusters)
# Bcell
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(34)] <- "Bcell"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(35)] <- "Plasma"
# Tcell
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(20)] <- "CD8T"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(21)] <- "CD4T"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(32)] <- "Treg"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(27)] <- "Tin" # innate-like T
# NK
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(19)] <- "NK1"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(0)] <- "NK2"
# ILC2
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(9)] <- "ILC2.CC"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(16)] <- "ILC2.1"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(33)] <- "ILC2.2"
# Basophil
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(37)] <- "BAS"
# Eosinophil
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(14)] <- "EOS"
# Neutrophil
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(13,1)] <- "NEU1"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(11,12)] <- "NEU2"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(6,4,3)] <- "NEU3"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(5,17)] <- "NEU4"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(2,8,31)] <- "NEU5"
# Siglech+
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(29)] <- "pDC"
#
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(30)] <- "cDC"
# Ccr7+ Fscn1+
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(26)] <- "migDC"
# Monocyte, half MHCII+ half Ly6c+
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(24)] <- "Mono1"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(10)] <- "Mono2"
# Interstitial Macrophages
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(23)] <- "IM1"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(18)] <- "IM2"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(15)] <- "IM3"
# Alveolar Macrophages
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(25)] <- "AM.CC"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(7)] <- "AM1"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(22)] <- "AM2"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(28)] <- "AM3"
# cyclijng mix
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(38)] <- "CC.mix1"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(36)] <- "CC.mix2"
GEX.seur$preAnno1 <- factor(GEX.seur$preAnno1,
levels = c("Bcell","Plasma",
"CD8T","CD4T","Treg","Tin",
"NK1","NK2",
"ILC2.CC","ILC2.1","ILC2.2",
"BAS","EOS",
paste0("NEU",1:5),
"pDC","cDC","migDC",
"Mono1","Mono2","IM1","IM2","IM3",
"AM.CC","AM1","AM2","AM3",
"CC.mix1","CC.mix2"))
GEX.seur$preAnno2 <- as.character(GEX.seur$seurat_clusters)
# Bcell
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(34,35)] <- "Bcell"
# Tcell
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(20,21,32,27)] <- "Tcell"
# NK
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(19,0)] <- "NK"
# ILC2
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(9,16,33)] <- "ILC2"
# Basophil
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(37)] <- "BAS"
# Eosinophil
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(14)] <- "EOS"
# Neutrophil
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(13,1,11,12,6,4,3,5,17,2,8,31)] <- "NEU"
# Siglech+
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(29)] <- "pDC"
#
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(30)] <- "cDC"
# Ccr7+ Fscn1+
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(26)] <- "migDC"
# Monocyte, half MHCII+ half Ly6c+
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(24,10)] <- "Monocyte"
# Interstitial Macrophages
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(23,18,15)] <- "IM"
# Alveolar Macrophages
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(25,7,22,28)] <- "AM"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(38,36)] <- "MIX"
GEX.seur$preAnno2 <- factor(GEX.seur$preAnno2,
levels = c("Bcell","Tcell",
"NK",
"ILC2",
"BAS","EOS",
"NEU",
"pDC","cDC","migDC",
"Monocyte","IM",
"AM","MIX"))
ggsci::pal_igv("default")(40)
## [1] "#5050FFFF" "#CE3D32FF" "#749B58FF" "#F0E685FF" "#466983FF" "#BA6338FF"
## [7] "#5DB1DDFF" "#802268FF" "#6BD76BFF" "#D595A7FF" "#924822FF" "#837B8DFF"
## [13] "#C75127FF" "#D58F5CFF" "#7A65A5FF" "#E4AF69FF" "#3B1B53FF" "#CDDEB7FF"
## [19] "#612A79FF" "#AE1F63FF" "#E7C76FFF" "#5A655EFF" "#CC9900FF" "#99CC00FF"
## [25] "#A9A9A9FF" "#CC9900FF" "#99CC00FF" "#33CC00FF" "#00CC33FF" "#00CC99FF"
## [31] "#0099CCFF" "#0A47FFFF" "#4775FFFF" "#FFC20AFF" "#FFD147FF" "#990033FF"
## [37] "#991A00FF" "#996600FF" "#809900FF" "#339900FF"
scales::show_col(ggsci::pal_igv("default")(40))
color.pre1 <- c(scales::hue_pal()(32),"grey","darkgrey")
color.pre2 <- c(ggsci::pal_igv("default")(40)[c(2,3,6,7,8,10,12,22,18:19,38,21,30)],"darkgrey")
scales::show_col(color.pre1)
scales::show_col(color.pre2)
(DimPlot(GEX.seur, reduction = "umap", group.by = "preAnno1", label = T, label.size = 3.5,repel = T,
cols =color.pre1) + NoLegend()) +
(DimPlot(GEX.seur, reduction = "umap", group.by = "preAnno2", label = T, label.size = 3.5,repel = T,
cols =color.pre2) + NoLegend())
DotPlot(GEX.seur, features = rev(as.vector(unlist(markers.preAnno))[1:60]), group.by = "preAnno1") + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
DotPlot(GEX.seur, features = rev(as.vector(unlist(markers.preAnno))[61:120]), group.by = "preAnno1") + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
DimPlot(GEX.seur, reduction = "umap", label = F,group.by = "preAnno1", split.by = "cnt", ncol = 2,
cols = color.pre1)
DimPlot(GEX.seur, reduction = "umap", label = F,group.by = "preAnno2", split.by = "cnt", ncol = 2,
cols = color.pre2)
#saveRDS(GEX.seur,"sc10x_RZB.preAnno.rds")
GEX.seur
## An object of class Seurat
## 18826 features across 21447 samples within 1 assay
## Active assay: RNA (18826 features, 1265 variable features)
## 3 dimensional reductions calculated: pca, tsne, umap
GEX.pure <- subset(GEX.seur, subset = preAnno2 != "MIX")
GEX.pure
## An object of class Seurat
## 18826 features across 21306 samples within 1 assay
## Active assay: RNA (18826 features, 1265 variable features)
## 3 dimensional reductions calculated: pca, tsne, umap
GEX.pure$preAnno1 <- factor(as.character(GEX.pure$preAnno1),
levels = c("Bcell","Plasma",
"CD8T","CD4T","Treg","Tin",
"NK1","NK2",
"ILC2.CC","ILC2.1","ILC2.2",
"BAS","EOS",
paste0("NEU",1:5),
"pDC","cDC","migDC",
"Mono1","Mono2","IM1","IM2","IM3",
"AM.CC","AM1","AM2","AM3"))
GEX.pure$preAnno2 <- factor(as.character(GEX.pure$preAnno2),
levels = c("Bcell","Tcell",
"NK",
"ILC2",
"BAS","EOS",
"NEU",
"pDC","cDC","migDC",
"Monocyte","IM",
"AM"))
DimPlot(GEX.pure, reduction = "umap", label = F,group.by = "preAnno2", split.by = "cnt", ncol = 2,
cols = color.pre2)
stat.SP <- table(GEX.pure@meta.data[,c("SP.info","preAnno1")])
#rownames(stat.SP)[4] <- c("M22+")
stat.SP
## preAnno1
## SP.info Bcell Plasma CD8T CD4T Treg Tin NK1 NK2 ILC2.CC ILC2.1 ILC2.2 BAS
## LUNG.CTL1 28 16 44 43 24 26 94 415 268 175 15 9
## LUNG.CTL2 34 34 60 40 45 12 82 383 179 108 14 13
## LUNG.CKO1 39 43 140 130 34 46 89 309 100 96 3 7
## LUNG.CKO2 34 23 182 67 49 22 91 382 101 128 6 12
## BALF.CTL1 2 0 7 44 16 48 39 14 53 8 74 4
## BALF.CTL2 0 0 1 35 16 11 40 8 36 6 40 5
## BALF.CKO1 1 0 3 33 6 121 32 8 11 2 10 4
## BALF.CKO2 1 0 1 26 13 14 27 7 28 7 25 9
## preAnno1
## SP.info EOS NEU1 NEU2 NEU3 NEU4 NEU5 pDC cDC migDC Mono1 Mono2 IM1 IM2 IM3
## LUNG.CTL1 49 415 96 24 52 25 58 58 57 63 127 100 110 58
## LUNG.CTL2 67 398 134 36 71 23 64 70 29 74 181 97 104 51
## LUNG.CKO1 55 468 177 49 164 49 44 46 31 95 184 49 137 55
## LUNG.CKO2 70 447 180 32 73 28 89 64 35 114 222 81 106 52
## BALF.CTL1 43 41 139 711 299 550 12 1 68 0 8 19 10 82
## BALF.CTL2 120 68 210 749 232 479 10 3 36 0 4 10 12 68
## BALF.CKO1 41 38 176 888 439 737 9 2 17 1 0 5 10 61
## BALF.CKO2 96 82 217 789 220 376 4 2 28 0 4 14 9 104
## preAnno1
## SP.info AM.CC AM1 AM2 AM3
## LUNG.CTL1 43 28 146 43
## LUNG.CTL2 24 16 84 6
## LUNG.CKO1 26 10 86 18
## LUNG.CKO2 18 5 86 17
## BALF.CTL1 95 222 1 94
## BALF.CTL2 32 129 2 9
## BALF.CKO1 32 164 0 50
## BALF.CKO2 45 284 1 60
round(rowRatio(stat.SP),3)
## preAnno1
## SP.info Bcell Plasma CD8T CD4T Treg Tin NK1 NK2 ILC2.CC ILC2.1
## LUNG.CTL1 0.010 0.006 0.016 0.016 0.009 0.010 0.035 0.153 0.099 0.065
## LUNG.CTL2 0.013 0.013 0.024 0.016 0.018 0.005 0.032 0.151 0.071 0.043
## LUNG.CKO1 0.014 0.015 0.050 0.047 0.012 0.017 0.032 0.111 0.036 0.035
## LUNG.CKO2 0.012 0.008 0.065 0.024 0.017 0.008 0.032 0.136 0.036 0.045
## BALF.CTL1 0.001 0.000 0.003 0.016 0.006 0.018 0.014 0.005 0.020 0.003
## BALF.CTL2 0.000 0.000 0.000 0.015 0.007 0.005 0.017 0.003 0.015 0.003
## BALF.CKO1 0.000 0.000 0.001 0.011 0.002 0.042 0.011 0.003 0.004 0.001
## BALF.CKO2 0.000 0.000 0.000 0.010 0.005 0.006 0.011 0.003 0.011 0.003
## preAnno1
## SP.info ILC2.2 BAS EOS NEU1 NEU2 NEU3 NEU4 NEU5 pDC cDC migDC
## LUNG.CTL1 0.006 0.003 0.018 0.153 0.035 0.009 0.019 0.009 0.021 0.021 0.021
## LUNG.CTL2 0.006 0.005 0.026 0.157 0.053 0.014 0.028 0.009 0.025 0.028 0.011
## LUNG.CKO1 0.001 0.003 0.020 0.168 0.064 0.018 0.059 0.018 0.016 0.017 0.011
## LUNG.CKO2 0.002 0.004 0.025 0.159 0.064 0.011 0.026 0.010 0.032 0.023 0.012
## BALF.CTL1 0.027 0.001 0.016 0.015 0.051 0.263 0.111 0.203 0.004 0.000 0.025
## BALF.CTL2 0.017 0.002 0.051 0.029 0.089 0.316 0.098 0.202 0.004 0.001 0.015
## BALF.CKO1 0.003 0.001 0.014 0.013 0.061 0.306 0.151 0.254 0.003 0.001 0.006
## BALF.CKO2 0.010 0.004 0.039 0.033 0.087 0.316 0.088 0.151 0.002 0.001 0.011
## preAnno1
## SP.info Mono1 Mono2 IM1 IM2 IM3 AM.CC AM1 AM2 AM3
## LUNG.CTL1 0.023 0.047 0.037 0.041 0.021 0.016 0.010 0.054 0.016
## LUNG.CTL2 0.029 0.071 0.038 0.041 0.020 0.009 0.006 0.033 0.002
## LUNG.CKO1 0.034 0.066 0.018 0.049 0.020 0.009 0.004 0.031 0.006
## LUNG.CKO2 0.040 0.079 0.029 0.038 0.018 0.006 0.002 0.031 0.006
## BALF.CTL1 0.000 0.003 0.007 0.004 0.030 0.035 0.082 0.000 0.035
## BALF.CTL2 0.000 0.002 0.004 0.005 0.029 0.013 0.054 0.001 0.004
## BALF.CKO1 0.000 0.000 0.002 0.003 0.021 0.011 0.057 0.000 0.017
## BALF.CKO2 0.000 0.002 0.006 0.004 0.042 0.018 0.114 0.000 0.024
melt.SP <- reshape2::melt(stat.SP)
melt.SP$preAnno1 <- factor(as.character(melt.SP$preAnno1),
levels = rev(levels(GEX.pure$preAnno1)))
color.stat1 <- color.pre1
names(color.stat1) <- levels(GEX.pure$preAnno1)
p.SP <- ggplot(melt.SP, aes(SP.info, weight=value, fill=preAnno1)) +
geom_bar(color="black", width = .7, position = "fill") +
labs( y = "Proportion of total (%)",title = "",x="") +
scale_fill_manual(values = color.stat1[rev(levels(GEX.pure$preAnno1))]) +
scale_y_continuous(expand = c(0,0), n.breaks = 6, labels = c(0,20,40,60,80,100)) +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 13.6),
axis.text.y = element_text(size = 16))+
guides(fill = guide_legend(reverse = T, title = ""))
p.SP
stat.SP <- table(GEX.pure@meta.data[,c("SP.info","preAnno2")])
#rownames(stat.SP)[4] <- c("M22+")
stat.SP
## preAnno2
## SP.info Bcell Tcell NK ILC2 BAS EOS NEU pDC cDC migDC Monocyte IM
## LUNG.CTL1 44 137 509 458 9 49 612 58 58 57 190 268
## LUNG.CTL2 68 157 465 301 13 67 662 64 70 29 255 252
## LUNG.CKO1 82 350 398 199 7 55 907 44 46 31 279 241
## LUNG.CKO2 57 320 473 235 12 70 760 89 64 35 336 239
## BALF.CTL1 2 115 53 135 4 43 1740 12 1 68 8 111
## BALF.CTL2 0 63 48 82 5 120 1738 10 3 36 4 90
## BALF.CKO1 1 163 40 23 4 41 2278 9 2 17 1 76
## BALF.CKO2 1 54 34 60 9 96 1684 4 2 28 4 127
## preAnno2
## SP.info AM
## LUNG.CTL1 260
## LUNG.CTL2 130
## LUNG.CKO1 140
## LUNG.CKO2 126
## BALF.CTL1 412
## BALF.CTL2 172
## BALF.CKO1 246
## BALF.CKO2 390
round(rowRatio(stat.SP),3)
## preAnno2
## SP.info Bcell Tcell NK ILC2 BAS EOS NEU pDC cDC migDC
## LUNG.CTL1 0.016 0.051 0.188 0.169 0.003 0.018 0.226 0.021 0.021 0.021
## LUNG.CTL2 0.027 0.062 0.184 0.119 0.005 0.026 0.261 0.025 0.028 0.011
## LUNG.CKO1 0.030 0.126 0.143 0.072 0.003 0.020 0.326 0.016 0.017 0.011
## LUNG.CKO2 0.020 0.114 0.168 0.083 0.004 0.025 0.270 0.032 0.023 0.012
## BALF.CTL1 0.001 0.043 0.020 0.050 0.001 0.016 0.643 0.004 0.000 0.025
## BALF.CTL2 0.000 0.027 0.020 0.035 0.002 0.051 0.733 0.004 0.001 0.015
## BALF.CKO1 0.000 0.056 0.014 0.008 0.001 0.014 0.785 0.003 0.001 0.006
## BALF.CKO2 0.000 0.022 0.014 0.024 0.004 0.039 0.675 0.002 0.001 0.011
## preAnno2
## SP.info Monocyte IM AM
## LUNG.CTL1 0.070 0.099 0.096
## LUNG.CTL2 0.101 0.099 0.051
## LUNG.CKO1 0.100 0.087 0.050
## LUNG.CKO2 0.119 0.085 0.045
## BALF.CTL1 0.003 0.041 0.152
## BALF.CTL2 0.002 0.038 0.073
## BALF.CKO1 0.000 0.026 0.085
## BALF.CKO2 0.002 0.051 0.156
melt.SP <- reshape2::melt(stat.SP)
melt.SP$preAnno2 <- factor(as.character(melt.SP$preAnno2),
levels = rev(levels(GEX.pure$preAnno2)))
color.stat2 <- color.pre2
names(color.stat2) <- levels(GEX.pure$preAnno2)
p.SP <- ggplot(melt.SP, aes(SP.info, weight=value, fill=preAnno2)) +
geom_bar(color="black", width = .7, position = "fill") +
labs( y = "Proportion of total (%)",title = "",x="") +
scale_fill_manual(values = color.stat2[rev(levels(GEX.pure$preAnno2))]) +
scale_y_continuous(expand = c(0,0), n.breaks = 6, labels = c(0,20,40,60,80,100)) +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 13.6),
axis.text.y = element_text(size = 16))+
guides(fill = guide_legend(reverse = T, title = ""))
p.SP
stat.cnt <- table(GEX.pure@meta.data[,c("cnt","preAnno2")])
stat.cnt
## preAnno2
## cnt Bcell Tcell NK ILC2 BAS EOS NEU pDC cDC migDC Monocyte IM
## LUNG.CTL 112 294 974 759 22 116 1274 122 128 86 445 520
## LUNG.CKO 139 670 871 434 19 125 1667 133 110 66 615 480
## BALF.CTL 2 178 101 217 9 163 3478 22 4 104 12 201
## BALF.CKO 2 217 74 83 13 137 3962 13 4 45 5 203
## preAnno2
## cnt AM
## LUNG.CTL 390
## LUNG.CKO 266
## BALF.CTL 584
## BALF.CKO 636
round(rowRatio(stat.cnt),3)
## preAnno2
## cnt Bcell Tcell NK ILC2 BAS EOS NEU pDC cDC migDC Monocyte
## LUNG.CTL 0.021 0.056 0.186 0.145 0.004 0.022 0.243 0.023 0.024 0.016 0.085
## LUNG.CKO 0.025 0.120 0.156 0.078 0.003 0.022 0.298 0.024 0.020 0.012 0.110
## BALF.CTL 0.000 0.035 0.020 0.043 0.002 0.032 0.685 0.004 0.001 0.020 0.002
## BALF.CKO 0.000 0.040 0.014 0.015 0.002 0.025 0.735 0.002 0.001 0.008 0.001
## preAnno2
## cnt IM AM
## LUNG.CTL 0.099 0.074
## LUNG.CKO 0.086 0.048
## BALF.CTL 0.040 0.115
## BALF.CKO 0.038 0.118
melt.cnt <- reshape2::melt(stat.cnt)
melt.cnt$preAnno2 <- factor(as.character(melt.cnt$preAnno2),
levels = rev(levels(GEX.pure$preAnno2)))
color.stat2 <- color.pre2
names(color.stat2) <- levels(GEX.pure$preAnno2)
p.cnt <- ggplot(melt.cnt, aes(cnt, weight=value, fill=preAnno2)) +
geom_bar(color="black", width = .7, position = "fill") +
labs( y = "Proportion of total (%)",title = "",x="") +
scale_fill_manual(values = color.stat2[rev(levels(GEX.pure$preAnno2))]) +
scale_y_continuous(expand = c(0,0), n.breaks = 6, labels = c(0,20,40,60,80,100)) +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 13.6),
axis.text.y = element_text(size = 16))+
guides(fill = guide_legend(reverse = T, title = ""))
p.cnt
GEX.pure$rep <- paste0("rep",
gsub("LUNG.CTL|LUNG.CKO|BALF.CTL|BALF.CKO","",as.character(GEX.pure$FB.info)))
pumap1 <- DimPlot(GEX.pure, reduction = "umap", label = T, group.by = "preAnno2", cols = color.pre2) +
DimPlot(GEX.pure, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)
pumap1
pumap2 <- DimPlot(GEX.pure, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "FB.info", split.by = "FB.info",
ncol = 4, cols = color.FB)
pumap2
pumap3 <- DimPlot(GEX.pure, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "preAnno2", split.by = "FB.info",
ncol = 4, cols = color.pre2)
pumap3
cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.pure$SP.info,
clusters=GEX.pure$preAnno2),
main = "Cell Count",
gaps_row = c(2,4,6),
#gaps_col = c(4,7,10),
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,
pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.pure$SP.info,
clusters=GEX.pure$preAnno2)),
main = "Cell Ratio",
gaps_row = c(2,4,6),
#gaps_col = c(4,7,10),
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)
stat_preAnno2 <- GEX.pure@meta.data[,c("cnt","rep","preAnno2")]
stat_preAnno2.s <- stat_preAnno2 %>%
group_by(cnt,rep,preAnno2) %>%
summarise(count=n()) %>%
mutate(prop= count/sum(count)) %>%
ungroup
## `summarise()` has grouped output by 'cnt', 'rep'. You can override using the
## `.groups` argument.
pcom.preAnno2 <- stat_preAnno2.s %>%
ggplot(aes(x = preAnno2, y = 100*prop, color=cnt)) +
geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
theme_classic(base_size = 15) +
scale_color_manual(values = c(color.cnt), name="") +
labs(title="Cell Composition of Lung immune data, preAnno2", y = "Proportion") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6)) +
geom_point(aes(x=preAnno2, y = 100*prop, color= cnt),
position = position_dodge(0.75),
shape=16,alpha=0.75,size=2.15,
stroke=0.15, show.legend=F)
pcom.preAnno2
glm.nb - anova.LRT
Sort.N <- c("LUNG.CTL","LUNG.CKO")
glm.nb_anova.preAnno2 <- list()
for(x1 in 1:2){
for(x2 in 1:2){
if(x2>x1){
stat_preAnno2.s_N <- stat_preAnno2.s %>% filter(cnt %in% c(Sort.N[x1],Sort.N[x2]))
total.N <- stat_preAnno2.s_N %>%
group_by(cnt, rep) %>%
summarise(total=sum(count)) %>% ungroup() %>% as.data.frame()
rownames(total.N) <- paste0(as.character(total.N$cnt),
"_",
as.character(total.N$rep))
stat_preAnno2.s_N$total <- total.N[paste0(stat_preAnno2.s_N$cnt,
"_",
stat_preAnno2.s_N$rep),"total"]
glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- list()
for(AA in levels(GEX.pure$preAnno2)){
glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]][[AA]] <- tryCatch({
aaa <- stat_preAnno2.s_N %>% filter(preAnno2==AA)
aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=1000)
aaa.anova <- anova(aaa.glmnb, test = "LRT")
aaa.anova$'Pr(>Chi)'[2]
}, error=function(e){
tryCatch({
aaa <- stat_preAnno2.s_N %>% filter(preAnno2==AA)
aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=100)
aaa.anova <- anova(aaa.glmnb, test = "LRT")
aaa.anova$'Pr(>Chi)'[2]
}, error=function(e){
tryCatch({
aaa <- stat_preAnno2.s_N %>% filter(preAnno2==AA)
aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=10)
aaa.anova <- anova(aaa.glmnb, test = "LRT")
aaa.anova$'Pr(>Chi)'[2]
}, error = function(e){
NA
})
})
})
}
glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- unlist(glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]])
}
}
}
glm.nb_anova.preAnno2_df1 <- t(data.frame(Reduce(function(x,y){rbind(x,y)},glm.nb_anova.preAnno2)))
rownames(glm.nb_anova.preAnno2_df1) <- names(glm.nb_anova.preAnno2)
colnames(glm.nb_anova.preAnno2_df1) <- gsub("X","C",colnames(glm.nb_anova.preAnno2_df1))
glm.nb_anova.preAnno2_df1
## Bcell Tcell NK ILC2 BAS
## LUNG.CTLvsLUNG.CKO 0.5027616 2.101569e-27 0.001200813 1.043013e-05 0.4982111
## EOS NEU pDC cDC migDC
## LUNG.CTLvsLUNG.CKO 0.9613576 0.01760329 0.9439911 0.1242673 0.1516296
## Monocyte IM AM
## LUNG.CTLvsLUNG.CKO 0.06729544 0.02171049 0.05362246
round(glm.nb_anova.preAnno2_df1,4)
## Bcell Tcell NK ILC2 BAS EOS NEU pDC cDC
## LUNG.CTLvsLUNG.CKO 0.5028 0 0.0012 0 0.4982 0.9614 0.0176 0.944 0.1243
## migDC Monocyte IM AM
## LUNG.CTLvsLUNG.CKO 0.1516 0.0673 0.0217 0.0536
Sort.N <- c("BALF.CTL","BALF.CKO")
glm.nb_anova.preAnno2 <- list()
for(x1 in 1:2){
for(x2 in 1:2){
if(x2>x1){
stat_preAnno2.s_N <- stat_preAnno2.s %>% filter(cnt %in% c(Sort.N[x1],Sort.N[x2]))
total.N <- stat_preAnno2.s_N %>%
group_by(cnt, rep) %>%
summarise(total=sum(count)) %>% ungroup() %>% as.data.frame()
rownames(total.N) <- paste0(as.character(total.N$cnt),
"_",
as.character(total.N$rep))
stat_preAnno2.s_N$total <- total.N[paste0(stat_preAnno2.s_N$cnt,
"_",
stat_preAnno2.s_N$rep),"total"]
glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- list()
for(AA in levels(GEX.pure$preAnno2)){
glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]][[AA]] <- tryCatch({
aaa <- stat_preAnno2.s_N %>% filter(preAnno2==AA)
aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=1000)
aaa.anova <- anova(aaa.glmnb, test = "LRT")
aaa.anova$'Pr(>Chi)'[2]
}, error=function(e){
tryCatch({
aaa <- stat_preAnno2.s_N %>% filter(preAnno2==AA)
aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=100)
aaa.anova <- anova(aaa.glmnb, test = "LRT")
aaa.anova$'Pr(>Chi)'[2]
}, error=function(e){
tryCatch({
aaa <- stat_preAnno2.s_N %>% filter(preAnno2==AA)
aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=10)
aaa.anova <- anova(aaa.glmnb, test = "LRT")
aaa.anova$'Pr(>Chi)'[2]
}, error = function(e){
NA
})
})
})
}
glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- unlist(glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]])
}
}
}
glm.nb_anova.preAnno2_df2 <- t(data.frame(Reduce(function(x,y){rbind(x,y)},glm.nb_anova.preAnno2)))
rownames(glm.nb_anova.preAnno2_df2) <- names(glm.nb_anova.preAnno2)
colnames(glm.nb_anova.preAnno2_df2) <- gsub("X","C",colnames(glm.nb_anova.preAnno2_df2))
glm.nb_anova.preAnno2_df2
## Bcell Tcell NK ILC2 BAS EOS
## BALF.CTLvsBALF.CKO 0.4940555 0.7424084 0.01437989 0.01182911 0.48015 0.6517545
## NEU pDC cDC migDC Monocyte
## BALF.CTLvsBALF.CKO 0.3903129 0.08739859 0.9313036 0.001902429 0.06470806
## IM AM
## BALF.CTLvsBALF.CKO 0.9023446 0.8368056
round(glm.nb_anova.preAnno2_df2,4)
## Bcell Tcell NK ILC2 BAS EOS NEU pDC
## BALF.CTLvsBALF.CKO 0.4941 0.7424 0.0144 0.0118 0.4802 0.6518 0.3903 0.0874
## cDC migDC Monocyte IM AM
## BALF.CTLvsBALF.CKO 0.9313 0.0019 0.0647 0.9023 0.8368
rbind(glm.nb_anova.preAnno2_df1,
glm.nb_anova.preAnno2_df2)
## Bcell Tcell NK ILC2 BAS
## LUNG.CTLvsLUNG.CKO 0.5027616 2.101569e-27 0.001200813 1.043013e-05 0.4982111
## BALF.CTLvsBALF.CKO 0.4940555 7.424084e-01 0.014379887 1.182911e-02 0.4801500
## EOS NEU pDC cDC migDC
## LUNG.CTLvsLUNG.CKO 0.9613576 0.01760329 0.94399112 0.1242673 0.151629648
## BALF.CTLvsBALF.CKO 0.6517545 0.39031287 0.08739859 0.9313036 0.001902429
## Monocyte IM AM
## LUNG.CTLvsLUNG.CKO 0.06729544 0.02171049 0.05362246
## BALF.CTLvsBALF.CKO 0.06470806 0.90234460 0.83680561
calculate in individual script
#grep("Rxr|Rar",rownames(GEX.pure),value = T)
#VlnPlot(GEX.pure, features = grep("Rxr|Rar",rownames(GEX.seur),value = T),
# ncol = 2, group.by = "preAnno2", split.by = "cnt", cols = color.cnt[1:4])